In [1]:
# !git lfs install
# !git clone https://huggingface.co/datasets/parambharat/tamil_asr_corpus

# !add-apt-repository -y ppa:jonathonf/ffmpeg-4
# !apt update
# !apt install -y ffmpeg

# !pip uninstall -y transformers datasets 
# !pip install audiomentations
# !pip install git+https://github.com/huggingface/datasets
# !pip install git+https://github.com/huggingface/transformers
# !pip install librosa soundfile
# !pip install "evaluate>=0.3.0"
# !pip install jiwer
# !pip install more-itertools
# !pip install wandb
# !pip install bitsandbytes
# !pip install "holoviews[recommended]"
In [2]:
%set_env WANDB_LOG_MODEL=True
%set_env WANDB_WATCH=all
%set_env WANDB_NOTEBOOK_NAME=whisper_small_ta.ipynb
env: WANDB_LOG_MODEL=True
env: WANDB_WATCH=all
env: WANDB_NOTEBOOK_NAME=whisper_small_ta.ipynb
In [3]:
import torch
from torch.utils.data import IterableDataset

from io import StringIO
import string
from dataclasses import dataclass
from typing import Any, Dict, List, Union

import wandb
from IPython.display import clear_output
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift
import numpy as np
In [4]:
from transformers import WhisperForConditionalGeneration
from transformers import WhisperProcessor
from transformers import Seq2SeqTrainingArguments
from transformers import Seq2SeqTrainer
from transformers import WhisperTokenizer
from transformers import WhisperFeatureExtractor
from huggingface_hub import notebook_login
from transformers import TrainerCallback
from transformers.integrations import WandbCallback
from transformers.trainer_pt_utils import IterableDatasetShard
from datasets import Dataset, IterableDatasetDict, load_dataset, interleave_datasets, Audio 
from datasets import load_dataset, Audio
import evaluate
In [5]:
from pathlib import Path
import pandas as pd
import holoviews as hv
import panel as pn
import tempfile
from bokeh.resources import INLINE
hv.extension("bokeh", logo=False)
import jiwer
In [6]:
torch.cuda.is_available()
Out[6]:
True
In [7]:
# wandb.login()
In [8]:
# notebook_login()
In [9]:
run = wandb.init(project="whisper_finetuning", job_type="fine-tuning", group="small-ta", resume="must", id="17xqqp5b")
wandb: Currently logged in as: parambharat. Use `wandb login --relogin` to force relogin
VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016671159649983263, max=1.0…
Tracking run with wandb version 0.13.6
Run data is saved locally in /home/ubuntu/whisper-finetuning/notebooks/wandb/run-20221212_073428-17xqqp5b
Resuming run quiet-sun-53 to Weights & Biases (docs)
In [10]:
def load_data_splits(is_streaming=True, stopping_strategy="all_exhausted"):
    dataset_dict = {}
    
    data_dict = load_dataset("../data/tamil_asr_corpus/", streaming=is_streaming)
        
    return data_dict
In [11]:
dataset_dict = load_data_splits()
In [12]:
augment_waveform = Compose([
    AddGaussianNoise(min_amplitude=0.005, max_amplitude=0.015, p=0.3),
    TimeStretch(min_rate=0.8, max_rate=1.25, p=0.3, leave_length_unchanged=False),
    PitchShift(min_semitones=-4, max_semitones=4, p=0.3)
    ,])

def augment_dataset(batch):

    audio = batch["audio"]["array"]
    # apply augmentation
    augmented_audio = augment_waveform(samples=audio, sample_rate=16000)

    batch["audio"]["array"] = augmented_audio

    return batch


# call augment dataset on the training set
dataset_dict["train"] = dataset_dict["train"].map(augment_dataset)
In [13]:
feature_extractor = WhisperFeatureExtractor.from_pretrained(
    "openai/whisper-small"
)
tokenizer = WhisperTokenizer.from_pretrained(
    "openai/whisper-small", 
     language="Tamil",
     task="transcribe",
     model_max_length=225
)
processor = WhisperProcessor.from_pretrained(
    "openai/whisper-small",
     language="Tamil", 
     task="transcribe",
     model_max_length=225
)
In [14]:
def fix_sentence(sentence):
    transcription = sentence
  
    if transcription.startswith('"') and transcription.endswith('"'):
        # we can remove trailing quotation marks as they do not affect the transcription
        transcription = transcription[1:-1]
  
    if transcription[-1] not in [".", "?", "!"]:
        # append a full-stop to sentences that do not end in punctuation
        transcription = transcription + "."
    transcription = transcription[:-1].translate(str.maketrans('', '', string.punctuation)) + transcription[-1]
    return transcription
    
def prepare_dataset(examples):
    # compute log-Mel input features from input audio array 
    audio = examples["audio"]
    
    examples["input_features"] = feature_extractor(
        audio["array"], sampling_rate=16000).input_features[0]
    
    sentences = fix_sentence(examples["sentence"])
    
    # encode target text to label ids 
    examples["labels"] = tokenizer(sentences, max_length=225, truncation=True).input_ids
    return examples
In [15]:
def filter_empty_strings(sentence):
    if len(sentence) < 2:
        return False
    else: return True
In [16]:
for k in dataset_dict:
    dataset_dict[k] = dataset_dict[k].filter(filter_empty_strings, input_columns=["sentence"])
In [17]:
for k in dataset_dict:
    dataset_dict[k] = dataset_dict[k].map(
        prepare_dataset,).with_format("torch")
In [18]:
dataset_dict["train"] = dataset_dict["train"].shuffle(buffer_size=500)
In [19]:
@dataclass
class DataCollatorSpeechSeq2SeqWithPadding:
    processor: Any

    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
        # split inputs and labels since they have to be of different lengths and need different padding methods
        # first treat the audio inputs by simply returning torch tensors
        input_features = [{"input_features": feature["input_features"]} for feature in features]
        batch = self.processor.feature_extractor.pad(input_features, return_tensors="pt")

        # get the tokenized label sequences
        label_features = [{"input_ids": self.processor.tokenizer.truncate_sequences(feature["labels"])[0]}
                          for feature in features]
        # pad the labels to max length
        
        labels_batch = self.processor.tokenizer.pad(label_features, return_tensors="pt",)

        # replace padding with -100 to ignore loss correctly
        labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)

        # if bos token is appended in previous tokenization step,
        # cut bos token here as it's append later anyways
        if (labels[:, 0] == self.processor.tokenizer.bos_token_id).all().cpu().item():
            labels = labels[:, 1:]

        batch["labels"] = labels

        return batch
In [20]:
data_collator = DataCollatorSpeechSeq2SeqWithPadding(processor=processor)
In [21]:
metric = evaluate.load("wer")

# evaluate with the 'normalised' WER
do_normalize_eval = True


def compute_metrics(pred):
    pred_ids = pred.predictions
    label_ids = pred.label_ids

    # replace -100 with the pad_token_id
    label_ids[label_ids == -100] = processor.tokenizer.pad_token_id

    # we do not want to group tokens when computing the metrics
    pred_str = processor.tokenizer.batch_decode(pred_ids, skip_special_tokens=True, normalize=do_normalize_eval)
    label_str = processor.tokenizer.batch_decode(label_ids, skip_special_tokens=True, normalize=do_normalize_eval)

    wer = 100 * metric.compute(predictions=pred_str, references=label_str)

    return {"wer": wer}
In [22]:
model = WhisperForConditionalGeneration.from_pretrained("./artifacts/model-17xqqp5b:v0", use_cache=False)
model.config.forced_decoder_ids = None
model.config.suppress_tokens = []
model.config.use_cache = False
In [23]:
# trainer callback to reinitialise and reshuffle the streamable datasets at the beginning of each epoch
class ShuffleCallback(TrainerCallback):
    def on_epoch_begin(self, args, state, control, train_dataloader, **kwargs):
        if isinstance(train_dataloader.dataset, IterableDatasetShard):
            pass  # set_epoch() is handled by the Trainer
        elif isinstance(train_dataloader.dataset, IterableDataset):
            train_dataloader.dataset.set_epoch(train_dataloader.dataset._epoch + 1)
            
In [24]:
def load_samples_dataset(dataset, num_samples=100):
    samples = []
    for i, item in enumerate(dataset):
        samples.append(item)
        if i == (num_samples-1):
            break
    sample_dataset = Dataset.from_list(samples)
    return sample_dataset

def compute_spectrograms(example):
    waveform =  example["audio"]["array"]
    specs = feature_extractor(waveform, sampling_rate=16000, padding="do_not_pad").input_features[0]
    return {"spectrogram": specs}


def record_to_html(sample_record):
    audio_array = np.array(sample_record["audio"]["array"])
    audio_sr = sample_record["audio"]["sampling_rate"]
    audio_duration = sample_record["length"]
    audio_spectrogram = np.array(sample_record["spectrogram"])

    bounds = (0,0, audio_duration, audio_spectrogram.max())

    waveform_int = np.int16(audio_array * 32767)

    
    
    hv_audio = pn.pane.Audio(waveform_int, sample_rate=audio_sr, name='Audio', throttle=500)
    
    slider = pn.widgets.FloatSlider(end=audio_duration, visible=False, step=0.001)
    line_audio = hv.VLine(0).opts(color='black')
    line_spec = hv.VLine(0).opts(color='red')
    
    
    slider.jslink(hv_audio, value='time', bidirectional=True)
    slider.jslink(line_audio, value='glyph.location')
    slider.jslink(line_spec, value='glyph.location')
    
    time = np.linspace(0, audio_duration, num=len(audio_array))
    line_plot_hv = hv.Curve(
        (time, audio_array), ["Time (s)", "amplitude"]).opts(
        width=500, height=150, axiswise=True) * line_audio
    
    hv_spec_gram = hv.Image(
        audio_spectrogram, bounds=(bounds), kdims=["Time (s)", "Frequency (hz)"]).opts(
        width=500, height=150, labelled=[], axiswise=True, color_levels=512)* line_spec
    
    
    combined = pn.Row(hv_audio, hv_spec_gram, line_plot_hv, slider)
    audio_html = StringIO()
    combined.save(audio_html)
    return audio_html


def dataset_to_records(dataset):
    records = []
    for item in dataset:
        record = {}
        record["audio_with_spec"] = wandb.Html(record_to_html(item))
        record["sentence"] = item["sentence"]
        record["length"] = item["length"]
        records.append(record)
    records = pd.DataFrame(records)
    return records
    
def decode_predictions(trainer, predictions):
    pred_ids = predictions.predictions
    pred_str = trainer.tokenizer.batch_decode(pred_ids, skip_special_tokens=True, )
    return pred_str


def compute_measures(predictions, labels):
    measures = [jiwer.compute_measures(ls, ps) for ps, ls in zip(predictions, labels)]
    measures_df = pd.DataFrame(measures)[["wer", "hits", "substitutions", "deletions", "insertions"]]
    return measures_df

class WandbProgressResultsCallback(WandbCallback):
    def __init__(self, trainer, sample_dataset): 
        super().__init__()
        self.trainer = trainer
        self.sample_dataset = sample_dataset
        self.records_df = dataset_to_records(sample_dataset)
        
    def on_log(self, args, state, control, model=None, logs=None, **kwargs):
        super().on_log(args, state, control, model, logs)
        predictions = trainer.predict(self.sample_dataset)
        predictions = decode_predictions(self.trainer, predictions)
        measures_df = compute_measures(predictions, self.records_df["sentence"].tolist())
        records_df = pd.concat([self.records_df, measures_df], axis=1)
        records_df["prediction"] = predictions
        records_df["step"] = state.global_step
        records_table = self._wandb.Table(dataframe=records_df)
        self._wandb.log({"sample_predictions": records_table})
        
    def on_save(self, args, state, control, model=None, tokenizer=None, **kwargs):
        if self._wandb is None:
            return
        if self._log_model and self._initialized and state.is_world_process_zero:
            with tempfile.TemporaryDirectory() as temp_dir:
                self.trainer.save_model(temp_dir)
                metadata = (
                    {
                        k: v
                        for k, v in dict(self._wandb.summary).items()
                        if isinstance(v, numbers.Number) and not k.startswith("_")
                    }
                    if not args.load_best_model_at_end
                    else {
                        f"eval/{args.metric_for_best_model}": state.best_metric,
                        "train/total_floss": state.total_flos,
                    }
                )
                artifact = self._wandb.Artifact(
                    name=f"model-{self._wandb.run.id}",
                    type="model", metadata=metadata)
                for f in Path(temp_dir).glob("*"):
                    if f.is_file():
                        with artifact.new_file(f.name, mode="wb") as fa:
                            fa.write(f.read_bytes())
                self._wandb.run.log_artifact(artifact)
In [32]:
training_args = Seq2SeqTrainingArguments(
    output_dir="../models/whisper-small-ta",  # change to a repo name of your choice
    per_device_train_batch_size=32,
    gradient_accumulation_steps=2,  # increase by 2x for every 2x decrease in batch size
    learning_rate=1e-5,
    save_total_limit=4,
    warmup_steps=500,
    max_steps=5000,
    gradient_checkpointing=True,
    fp16=True,
#     fp16_full_eval=True,
    optim="adamw_bnb_8bit",
    evaluation_strategy="steps",
    per_device_eval_batch_size=16,
    predict_with_generate=True,
    generation_max_length=225,
    save_steps=500,
    eval_steps=500,
    logging_steps=250,
    report_to="none",
    load_best_model_at_end=True,
    metric_for_best_model="wer",
    greater_is_better=False,
    push_to_hub=True,
    hub_strategy="checkpoint",
    remove_unused_columns=False, 
    ignore_data_skip=True
)
PyTorch: setting up devices
In [26]:
samples_dataset = load_samples_dataset(dataset_dict["test"]).map(compute_spectrograms)
  0%|          | 0/100 [00:00<?, ?ex/s]
In [27]:
trainer = Seq2SeqTrainer(
    args=training_args,
    model=model,
    train_dataset=dataset_dict["train"],
    eval_dataset=samples_dataset,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    
    tokenizer=processor,
    callbacks=[ShuffleCallback()],
    
)
/home/ubuntu/whisper-finetuning/notebooks/../models/whisper-small-ta is already a clone of https://huggingface.co/parambharat/whisper-small-ta. Make sure you pull the latest changes with `repo.git_pull()`.
max_steps is given, it will override any value given in num_train_epochs
Using cuda_amp half precision backend
In [28]:
progress_callback = WandbProgressResultsCallback(trainer, samples_dataset)
clear_output()
In [29]:
trainer.add_callback(progress_callback)
In [30]:
# model.save_pretrained(training_args.output_dir)
# processor.save_pretrained(training_args.output_dir)
In [31]:
trainer.train()
***** Running training *****
  Num examples = 320000
  Num Epochs = 9223372036854775807
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 64
  Gradient Accumulation steps = 2
  Total optimization steps = 5000
  Number of trainable parameters = 241734912
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
[5000/5000 23:59:59, Epoch 1/9223372036854775807]
Step Training Loss Validation Loss Wer
500 0.337400 0.257853 23.380419
1000 0.290000 0.226021 20.993668
1500 0.252200 0.213904 20.068193
2000 0.233800 0.202510 19.678519
2500 0.223000 0.197901 18.314661
3000 0.211000 0.192680 17.827569
3500 0.203200 0.186459 17.389187
4000 0.197800 0.183870 17.535314
4500 0.197200 0.181212 17.096931
5000 0.189400 0.180317 17.145641

[7/7 01:24]
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
Saving model checkpoint to ../models/whisper-small-ta/checkpoint-500
Configuration saved in ../models/whisper-small-ta/checkpoint-500/config.json
Model weights saved in ../models/whisper-small-ta/checkpoint-500/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/checkpoint-500/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/checkpoint-500/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/checkpoint-500/added_tokens.json
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Saving model checkpoint to /tmp/tmp6eo55h_b
Configuration saved in /tmp/tmp6eo55h_b/config.json
Model weights saved in /tmp/tmp6eo55h_b/pytorch_model.bin
Feature extractor saved in /tmp/tmp6eo55h_b/preprocessor_config.json
tokenizer config file saved in /tmp/tmp6eo55h_b/tokenizer_config.json
Special tokens file saved in /tmp/tmp6eo55h_b/special_tokens_map.json
added tokens file saved in /tmp/tmp6eo55h_b/added_tokens.json
Saving model checkpoint to ../models/whisper-small-ta
Configuration saved in ../models/whisper-small-ta/config.json
Model weights saved in ../models/whisper-small-ta/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}}
Several commits (2) will be pushed upstream.
The progress bars may be unreliable.
Upload file pytorch_model.bin:   0%|          | 32.0k/922M [00:00<?, ?B/s]
remote: Scanning LFS files for validity, may be slow...        
remote: LFS file scan complete.        
To https://huggingface.co/parambharat/whisper-small-ta
   a6819b0..07ab463  main -> main

***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
Saving model checkpoint to ../models/whisper-small-ta/checkpoint-1000
Configuration saved in ../models/whisper-small-ta/checkpoint-1000/config.json
Model weights saved in ../models/whisper-small-ta/checkpoint-1000/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/checkpoint-1000/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/checkpoint-1000/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/checkpoint-1000/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/checkpoint-1000/added_tokens.json
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Saving model checkpoint to /tmp/tmpe3_fznmd
Configuration saved in /tmp/tmpe3_fznmd/config.json
Model weights saved in /tmp/tmpe3_fznmd/pytorch_model.bin
Feature extractor saved in /tmp/tmpe3_fznmd/preprocessor_config.json
tokenizer config file saved in /tmp/tmpe3_fznmd/tokenizer_config.json
Special tokens file saved in /tmp/tmpe3_fznmd/special_tokens_map.json
added tokens file saved in /tmp/tmpe3_fznmd/added_tokens.json
Saving model checkpoint to ../models/whisper-small-ta
Configuration saved in ../models/whisper-small-ta/config.json
Model weights saved in ../models/whisper-small-ta/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}}
Several commits (2) will be pushed upstream.
The progress bars may be unreliable.
Upload file pytorch_model.bin:   0%|          | 32.0k/922M [00:00<?, ?B/s]
remote: Scanning LFS files for validity, may be slow...        
remote: LFS file scan complete.        
To https://huggingface.co/parambharat/whisper-small-ta
   07ab463..8a53b5e  main -> main

***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
Saving model checkpoint to ../models/whisper-small-ta/checkpoint-1500
Configuration saved in ../models/whisper-small-ta/checkpoint-1500/config.json
Model weights saved in ../models/whisper-small-ta/checkpoint-1500/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/checkpoint-1500/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/checkpoint-1500/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/checkpoint-1500/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/checkpoint-1500/added_tokens.json
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Saving model checkpoint to /tmp/tmpm7i5h07i
Configuration saved in /tmp/tmpm7i5h07i/config.json
Model weights saved in /tmp/tmpm7i5h07i/pytorch_model.bin
Feature extractor saved in /tmp/tmpm7i5h07i/preprocessor_config.json
tokenizer config file saved in /tmp/tmpm7i5h07i/tokenizer_config.json
Special tokens file saved in /tmp/tmpm7i5h07i/special_tokens_map.json
added tokens file saved in /tmp/tmpm7i5h07i/added_tokens.json
Saving model checkpoint to ../models/whisper-small-ta
Configuration saved in ../models/whisper-small-ta/config.json
Model weights saved in ../models/whisper-small-ta/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}}
Several commits (2) will be pushed upstream.
The progress bars may be unreliable.
Upload file pytorch_model.bin:   0%|          | 32.0k/922M [00:00<?, ?B/s]
remote: Scanning LFS files for validity, may be slow...        
remote: LFS file scan complete.        
To https://huggingface.co/parambharat/whisper-small-ta
   8a53b5e..dcdf420  main -> main

***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
Saving model checkpoint to ../models/whisper-small-ta/checkpoint-2000
Configuration saved in ../models/whisper-small-ta/checkpoint-2000/config.json
Model weights saved in ../models/whisper-small-ta/checkpoint-2000/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/checkpoint-2000/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/checkpoint-2000/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/checkpoint-2000/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/checkpoint-2000/added_tokens.json
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Saving model checkpoint to /tmp/tmptax5iloo
Configuration saved in /tmp/tmptax5iloo/config.json
Model weights saved in /tmp/tmptax5iloo/pytorch_model.bin
Feature extractor saved in /tmp/tmptax5iloo/preprocessor_config.json
tokenizer config file saved in /tmp/tmptax5iloo/tokenizer_config.json
Special tokens file saved in /tmp/tmptax5iloo/special_tokens_map.json
added tokens file saved in /tmp/tmptax5iloo/added_tokens.json
Saving model checkpoint to ../models/whisper-small-ta
Configuration saved in ../models/whisper-small-ta/config.json
Model weights saved in ../models/whisper-small-ta/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}}
Several commits (2) will be pushed upstream.
The progress bars may be unreliable.
Upload file pytorch_model.bin:   0%|          | 32.0k/922M [00:00<?, ?B/s]
remote: Scanning LFS files for validity, may be slow...        
remote: LFS file scan complete.        
To https://huggingface.co/parambharat/whisper-small-ta
   dcdf420..8422cd0  main -> main

***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
Saving model checkpoint to ../models/whisper-small-ta/checkpoint-2500
Configuration saved in ../models/whisper-small-ta/checkpoint-2500/config.json
Model weights saved in ../models/whisper-small-ta/checkpoint-2500/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/checkpoint-2500/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/checkpoint-2500/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/checkpoint-2500/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/checkpoint-2500/added_tokens.json
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Deleting older checkpoint [../models/whisper-small-ta/checkpoint-500] due to args.save_total_limit
Saving model checkpoint to /tmp/tmpvpcmikf5
Configuration saved in /tmp/tmpvpcmikf5/config.json
Model weights saved in /tmp/tmpvpcmikf5/pytorch_model.bin
Feature extractor saved in /tmp/tmpvpcmikf5/preprocessor_config.json
tokenizer config file saved in /tmp/tmpvpcmikf5/tokenizer_config.json
Special tokens file saved in /tmp/tmpvpcmikf5/special_tokens_map.json
added tokens file saved in /tmp/tmpvpcmikf5/added_tokens.json
Saving model checkpoint to ../models/whisper-small-ta
Configuration saved in ../models/whisper-small-ta/config.json
Model weights saved in ../models/whisper-small-ta/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}}
Several commits (2) will be pushed upstream.
The progress bars may be unreliable.
Upload file pytorch_model.bin:   0%|          | 32.0k/922M [00:00<?, ?B/s]
remote: Scanning LFS files for validity, may be slow...        
remote: LFS file scan complete.        
To https://huggingface.co/parambharat/whisper-small-ta
   8422cd0..92b9c07  main -> main

***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
Saving model checkpoint to ../models/whisper-small-ta/checkpoint-3000
Configuration saved in ../models/whisper-small-ta/checkpoint-3000/config.json
Model weights saved in ../models/whisper-small-ta/checkpoint-3000/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/checkpoint-3000/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/checkpoint-3000/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/checkpoint-3000/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/checkpoint-3000/added_tokens.json
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Deleting older checkpoint [../models/whisper-small-ta/checkpoint-1000] due to args.save_total_limit
Saving model checkpoint to /tmp/tmphl8t83yi
Configuration saved in /tmp/tmphl8t83yi/config.json
Model weights saved in /tmp/tmphl8t83yi/pytorch_model.bin
Feature extractor saved in /tmp/tmphl8t83yi/preprocessor_config.json
tokenizer config file saved in /tmp/tmphl8t83yi/tokenizer_config.json
Special tokens file saved in /tmp/tmphl8t83yi/special_tokens_map.json
added tokens file saved in /tmp/tmphl8t83yi/added_tokens.json
Saving model checkpoint to ../models/whisper-small-ta
Configuration saved in ../models/whisper-small-ta/config.json
Model weights saved in ../models/whisper-small-ta/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}}
Several commits (2) will be pushed upstream.
The progress bars may be unreliable.
Upload file pytorch_model.bin:   0%|          | 32.0k/922M [00:00<?, ?B/s]
remote: Scanning LFS files for validity, may be slow...        
remote: LFS file scan complete.        
To https://huggingface.co/parambharat/whisper-small-ta
   92b9c07..b375c14  main -> main

***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
Saving model checkpoint to ../models/whisper-small-ta/checkpoint-3500
Configuration saved in ../models/whisper-small-ta/checkpoint-3500/config.json
Model weights saved in ../models/whisper-small-ta/checkpoint-3500/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/checkpoint-3500/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/checkpoint-3500/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/checkpoint-3500/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/checkpoint-3500/added_tokens.json
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Deleting older checkpoint [../models/whisper-small-ta/checkpoint-1500] due to args.save_total_limit
Saving model checkpoint to /tmp/tmpg3qb4don
Configuration saved in /tmp/tmpg3qb4don/config.json
Model weights saved in /tmp/tmpg3qb4don/pytorch_model.bin
Feature extractor saved in /tmp/tmpg3qb4don/preprocessor_config.json
tokenizer config file saved in /tmp/tmpg3qb4don/tokenizer_config.json
Special tokens file saved in /tmp/tmpg3qb4don/special_tokens_map.json
added tokens file saved in /tmp/tmpg3qb4don/added_tokens.json
Saving model checkpoint to ../models/whisper-small-ta
Configuration saved in ../models/whisper-small-ta/config.json
Model weights saved in ../models/whisper-small-ta/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}}
Several commits (2) will be pushed upstream.
The progress bars may be unreliable.
Upload file pytorch_model.bin:   0%|          | 32.0k/922M [00:00<?, ?B/s]
remote: Scanning LFS files for validity, may be slow...        
remote: LFS file scan complete.        
To https://huggingface.co/parambharat/whisper-small-ta
   b375c14..58592c9  main -> main

***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
Saving model checkpoint to ../models/whisper-small-ta/checkpoint-4000
Configuration saved in ../models/whisper-small-ta/checkpoint-4000/config.json
Model weights saved in ../models/whisper-small-ta/checkpoint-4000/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/checkpoint-4000/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/checkpoint-4000/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/checkpoint-4000/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/checkpoint-4000/added_tokens.json
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Deleting older checkpoint [../models/whisper-small-ta/checkpoint-2000] due to args.save_total_limit
Saving model checkpoint to /tmp/tmpn8kp2k4c
Configuration saved in /tmp/tmpn8kp2k4c/config.json
Model weights saved in /tmp/tmpn8kp2k4c/pytorch_model.bin
Feature extractor saved in /tmp/tmpn8kp2k4c/preprocessor_config.json
tokenizer config file saved in /tmp/tmpn8kp2k4c/tokenizer_config.json
Special tokens file saved in /tmp/tmpn8kp2k4c/special_tokens_map.json
added tokens file saved in /tmp/tmpn8kp2k4c/added_tokens.json
Saving model checkpoint to ../models/whisper-small-ta
Configuration saved in ../models/whisper-small-ta/config.json
Model weights saved in ../models/whisper-small-ta/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}}
Several commits (2) will be pushed upstream.
The progress bars may be unreliable.
Upload file pytorch_model.bin:   0%|          | 32.0k/922M [00:00<?, ?B/s]
remote: Scanning LFS files for validity, may be slow...        
remote: LFS file scan complete.        
To https://huggingface.co/parambharat/whisper-small-ta
   58592c9..e1b1724  main -> main

***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
Saving model checkpoint to ../models/whisper-small-ta/checkpoint-4500
Configuration saved in ../models/whisper-small-ta/checkpoint-4500/config.json
Model weights saved in ../models/whisper-small-ta/checkpoint-4500/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/checkpoint-4500/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/checkpoint-4500/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/checkpoint-4500/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/checkpoint-4500/added_tokens.json
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Deleting older checkpoint [../models/whisper-small-ta/checkpoint-2500] due to args.save_total_limit
Saving model checkpoint to /tmp/tmpm31_ft97
Configuration saved in /tmp/tmpm31_ft97/config.json
Model weights saved in /tmp/tmpm31_ft97/pytorch_model.bin
Feature extractor saved in /tmp/tmpm31_ft97/preprocessor_config.json
tokenizer config file saved in /tmp/tmpm31_ft97/tokenizer_config.json
Special tokens file saved in /tmp/tmpm31_ft97/special_tokens_map.json
added tokens file saved in /tmp/tmpm31_ft97/added_tokens.json
Saving model checkpoint to ../models/whisper-small-ta
Configuration saved in ../models/whisper-small-ta/config.json
Model weights saved in ../models/whisper-small-ta/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}}
Several commits (2) will be pushed upstream.
The progress bars may be unreliable.
Upload file pytorch_model.bin:   0%|          | 32.0k/922M [00:00<?, ?B/s]
remote: Scanning LFS files for validity, may be slow...        
remote: LFS file scan complete.        
To https://huggingface.co/parambharat/whisper-small-ta
   e1b1724..5245ff0  main -> main

***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
Saving model checkpoint to ../models/whisper-small-ta/checkpoint-5000
Configuration saved in ../models/whisper-small-ta/checkpoint-5000/config.json
Model weights saved in ../models/whisper-small-ta/checkpoint-5000/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/checkpoint-5000/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/checkpoint-5000/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/checkpoint-5000/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/checkpoint-5000/added_tokens.json
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Deleting older checkpoint [../models/whisper-small-ta/checkpoint-3000] due to args.save_total_limit
Saving model checkpoint to /tmp/tmpf7aeicqp
Configuration saved in /tmp/tmpf7aeicqp/config.json
Model weights saved in /tmp/tmpf7aeicqp/pytorch_model.bin
Feature extractor saved in /tmp/tmpf7aeicqp/preprocessor_config.json
tokenizer config file saved in /tmp/tmpf7aeicqp/tokenizer_config.json
Special tokens file saved in /tmp/tmpf7aeicqp/special_tokens_map.json
added tokens file saved in /tmp/tmpf7aeicqp/added_tokens.json
Saving model checkpoint to ../models/whisper-small-ta
Configuration saved in ../models/whisper-small-ta/config.json
Model weights saved in ../models/whisper-small-ta/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}}
Several commits (2) will be pushed upstream.
The progress bars may be unreliable.
Upload file pytorch_model.bin:   0%|          | 32.0k/922M [00:00<?, ?B/s]
remote: Scanning LFS files for validity, may be slow...        
remote: LFS file scan complete.        
To https://huggingface.co/parambharat/whisper-small-ta
   5245ff0..0ea1415  main -> main



Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ../models/whisper-small-ta/checkpoint-4500 (score: 17.096931320019486).
***** Running Prediction *****
  Num examples = 100
  Batch size = 16
/home/ubuntu/whisper-finetuning/notebooks/../models/whisper-small-ta is already a clone of https://huggingface.co/parambharat/whisper-small-ta. Make sure you pull the latest changes with `repo.git_pull()`.
max_steps is given, it will override any value given in num_train_epochs
Using cuda_amp half precision backend
Saving model checkpoint to /tmp/tmp0p_p2txp
Configuration saved in /tmp/tmp0p_p2txp/config.json
Model weights saved in /tmp/tmp0p_p2txp/pytorch_model.bin
Feature extractor saved in /tmp/tmp0p_p2txp/preprocessor_config.json
tokenizer config file saved in /tmp/tmp0p_p2txp/tokenizer_config.json
Special tokens file saved in /tmp/tmp0p_p2txp/special_tokens_map.json
added tokens file saved in /tmp/tmp0p_p2txp/added_tokens.json
Saving model checkpoint to ../models/whisper-small-ta
Configuration saved in ../models/whisper-small-ta/config.json
Model weights saved in ../models/whisper-small-ta/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
remote: Scanning LFS files for validity, may be slow...        
remote: LFS file scan complete.        
To https://huggingface.co/parambharat/whisper-small-ta
   0ea1415..825c5f6  main -> main

Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}}
To https://huggingface.co/parambharat/whisper-small-ta
   825c5f6..93e26e3  main -> main

Out[31]:
TrainOutput(global_step=5000, training_loss=0.23831620330810546, metrics={'train_runtime': 86386.8863, 'train_samples_per_second': 3.704, 'train_steps_per_second': 0.058, 'total_flos': 9.23473281024e+19, 'train_loss': 0.23831620330810546, 'epoch': 1.0})
In [33]:
kwargs = {
    "language": "ta",
    "model_name": "Whisper Small Ta - Bharat Ramanathan",  # a 'pretty' name for your model
    "finetuned_from": "openai/whisper-small",
    "tasks": "automatic-speech-recognition",
    "tags": "whisper-event",
}
In [34]:
trainer.push_to_hub(**kwargs)
Saving model checkpoint to ../models/whisper-small-ta
Configuration saved in ../models/whisper-small-ta/config.json
Model weights saved in ../models/whisper-small-ta/pytorch_model.bin
Feature extractor saved in ../models/whisper-small-ta/preprocessor_config.json
tokenizer config file saved in ../models/whisper-small-ta/tokenizer_config.json
Special tokens file saved in ../models/whisper-small-ta/special_tokens_map.json
added tokens file saved in ../models/whisper-small-ta/added_tokens.json
Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'metrics': [{'name': 'Wer', 'type': 'wer', 'value': 17.145640526059424}]}
To https://huggingface.co/parambharat/whisper-small-ta
   93e26e3..d23d815  main -> main

In [35]:
wandb.finish()
Waiting for W&B process to finish... (success).
VBox(children=(Label(value='9542.852 MB of 9542.852 MB uploaded (315.318 MB deduped)\r'), FloatProgress(value=…

Run history:


eval/loss█▅▄▃▃▂▂▁▁▁
eval/runtime▂▄▂▁█▁▂▃▂▅
eval/samples_per_second▇▅▇▇▁█▇▆▇▄
eval/steps_per_second█▆██▁██▆█▆
eval/wer█▅▄▄▂▂▁▁▁▁
train/epoch▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇████
train/global_step▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇█████
train/learning_rate▅██▇▇▆▆▆▅▅▅▄▄▃▃▃▂▂▁▁
train/loss█▇▆▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁
train/total_flos▁
train/train_loss▁
train/train_runtime▁
train/train_samples_per_second▁
train/train_steps_per_second▁

Run summary:


eval/loss0.18032
eval/runtime108.2177
eval/samples_per_second0.924
eval/steps_per_second0.065
eval/wer17.14564
train/epoch1.0
train/global_step5000
train/learning_rate0.0
train/loss0.1894
train/total_flos9.23473281024e+19
train/train_loss0.23832
train/train_runtime86386.8863
train/train_samples_per_second3.704
train/train_steps_per_second0.058

Synced quiet-sun-53: https://wandb.ai/parambharat/whisper_finetuning/runs/17xqqp5b
Synced 3 W&B file(s), 31 media file(s), 140 artifact file(s) and 0 other file(s)
Find logs at: ./wandb/run-20221212_073428-17xqqp5b/logs
In [ ]: